/*
 *   Copyright (c) 2018. 刘路 All rights reserved
 *   版权所有 刘路 并保留所有权利 2018.
 *   ===============================================================
 *   这不是一个自由软件！您只能在不用于商业目的的前提下对程序代码进行修改和
 *   使用。不允许对程序代码以任何形式任何目的的再发布。如果项目发布携带作者
 *   认可的特殊 LICENSE 则按照 LICENSE 执行，废除上面内容。请保留原作者信息。
 *   ================================================================
 *   刘路（feedback@zhoyq.com）于 2018. 创建
 *   http://zhoyq.com
 */

package com.zhoyq.helper;

import org.apache.http.*;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 数据抓取类
 * @author 刘路
 */
public class DataCrawlingHelper {
  
  private static Logger log = LoggerFactory.getLogger(DataCrawlingHelper.class);
  
  public static HttpClient newClient(){
    HttpClientBuilder builder = HttpClientBuilder.create();
    HttpClient client = builder.build();
    return client;
  }

  /**
   * 发送POST请求
   * @param url
   *            目的地址
   * @param parameters
   *            请求参数，Map类型。
   * @return 远程响应结果
   */
  public static String sendPost(String url, Map<String, String> parameters) {
    // 返回的结果
    String result = "";
    // 读取响应输入流
    BufferedReader in = null;
    PrintWriter out = null;
    // 处理请求参数
    StringBuffer sb = new StringBuffer();
    // 编码之后的参数
    String params = "";
    if (parameters == null) {
      parameters = new HashMap<>(0);
    }
    try {
      // 编码请求参数
      if (parameters.size() == 1) {
        for (String name : parameters.keySet()) {
          sb.append(name).append("=").append( URLEncoder.encode(parameters.get(name), "UTF-8"));
        }
        params = sb.toString();
      } else if(parameters.size()>1){
        for (String name : parameters.keySet()) {
          sb.append(name).append("=").append(
              URLEncoder.encode(parameters.get(name), "UTF-8")).append("&");
        }
        String temp_params = sb.toString();
        params = temp_params.substring(0, temp_params.length() - 1);
      }
      // 创建URL对象
      java.net.URL connURL = new java.net.URL(url);
      // 打开URL连接
      java.net.HttpURLConnection httpConn = (java.net.HttpURLConnection) connURL
          .openConnection();
      // 设置通用属性
      httpConn.setRequestProperty("Accept", "*/*");
      httpConn.setRequestProperty("Connection", "Keep-Alive");
      httpConn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)");
      // 设置POST方式
      httpConn.setDoInput(true);
      httpConn.setDoOutput(true);
      // 获取HttpURLConnection对象对应的输出流
      out = new PrintWriter(httpConn.getOutputStream());
      // 发送请求参数 
      out.write(params);
      // flush输出流的缓冲
      out.flush();
      // 定义BufferedReader输入流来读取URL的响应，设置编码方式
      in = new BufferedReader(new InputStreamReader(httpConn
          .getInputStream(), "UTF-8"));
      String line;
      // 读取返回的内容
      while ((line = in.readLine()) != null) {
        result += line;
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      try {
        if (out != null) {
          out.close();
        }
        if (in != null) {
          in.close();
        }
      } catch (IOException ex) {
        ex.printStackTrace();
      }
    }
    return result;
  }

  /**
   * 发送GET请求
   * 
   * @param url
   *            目的地址
   * @param parameters
   *            请求参数，Map类型。
   * @return 远程响应结果
   */
  public static String sendGet(String url, Map<String, String> parameters) {
    // 返回的结果
    String result = "";
    // 读取响应输入流
    BufferedReader in = null;
    // 存储参数
    StringBuffer sb = new StringBuffer();
    // 编码之后的参数
    String params = "";
    if(parameters == null) {
      parameters = new HashMap<String, String>();
    }
    try {
      // 编码请求参数
      if (parameters.size() == 1) {
        for (String name : parameters.keySet()) {
          sb.append(name).append("=").append(parameters.get(name));
        }
        params = sb.toString();
      } else if(parameters.size()>1){
        for (String name : parameters.keySet()) {
          sb.append(name).append("=").append(parameters.get(name)).append("&");
        }
        String temp_params = sb.toString();
        params = temp_params.substring(0, temp_params.length() - 1);
      }
      String full_url = url + "?" + params;
      // 创建URL对象
      java.net.URL connURL = new java.net.URL(full_url);
      // 打开URL连接
      java.net.HttpURLConnection httpConn = (java.net.HttpURLConnection) connURL
          .openConnection();
      // 设置通用属性
      httpConn.setRequestProperty("Accept", "*/*");
      httpConn.setRequestProperty("Connection", "Keep-Alive");
      httpConn.setRequestProperty("User-Agent",
          "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)");
      // 建立实际的连接
      httpConn.connect();
      // 响应头部获取
      Map<String, List<String>> headers = httpConn.getHeaderFields();
      // 定义BufferedReader输入流来读取URL的响应,并设置编码方式
      in = new BufferedReader(new InputStreamReader(httpConn
          .getInputStream(), "UTF-8"));
      String line;
      // 读取返回的内容
      while ((line = in.readLine()) != null) {
        result += line;
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      try {
        if (in != null) {
          in.close();
        }
      } catch (IOException ex) {
        ex.printStackTrace();
      }
    }
    return result;
  }
  
  /**
   * 
   * @param client 统一的client 用于需要登录验证的地方
   * @param url 访问url
   * @param params 访问参数列表
   * @param method 访问方法
   * @param encode 访问编码 UTF-8 GBK
   * @return
   */
  public static String getData(HttpClient client, String url,Map<String,String> params,HttpMethod method,String encode){ 
    HttpResponse res = null;
    
    switch(method){
      case POST:
        HttpPost post = new HttpPost(url); 
        if(params!=null){
          List<NameValuePair> parameters = new ArrayList<NameValuePair>(); 
          params.forEach((key,val)->{
            parameters.add(new BasicNameValuePair(key,val));
          });
          try {
            post.setEntity(new UrlEncodedFormEntity(parameters));
          } catch (UnsupportedEncodingException e) { 
            log.warn(e.getMessage()); 
            log.warn("no parameters init~");
          }
        } 
        try {
          res = client.execute(post);
        } catch (IOException e) { 
          log.warn(e.getMessage());
          log.warn("post exec failed~");
          return null;
        } 
        break;
      case GET:
        
        StringBuilder parameters = new StringBuilder();
        parameters.append("?");
        if(params!=null){ 
          params.forEach((key,val)->{
            String value = null;
            try {
              value = URLEncoder.encode(val,encode);
            } catch (Exception e) {
              log.warn(e.getMessage());
              log.warn("url encoder failed~");
              value = "";
            }
            parameters.append(key).append("=").append(value).append("&");
          });
          parameters.delete(parameters.length()-1, parameters.length());
        } 
        
        String finalurl = url + parameters.toString();
        log.info(finalurl);
        HttpGet get = new HttpGet(finalurl); 
        try {
          res = client.execute(get);
        } catch (IOException e) { 
          log.warn(e.getMessage());
          log.warn("get exec failed~");
          return null;
        } 
        break;
    }
    
    HttpEntity entity = res.getEntity();
    try {
      String result = EntityUtils.toString(entity, encode);
      return result;
    } catch (ParseException | IOException e) {
      log.warn(e.getMessage());
    }
    return null;
  }
  /**
   * 仅适用于单独的链接 不需要任何登录的内容
   * @param url 访问url
   * @param params 访问参数列表
   * @param method 访问方法
   * @param encode 访问编码 UTF-8 GBK
   * @return
   */
  public static String getData(String url,Map<String,String> params,HttpMethod method,String encode){
    return getData(newClient(), url, params, method, encode);
  }
  
  /**
   * post
   * @param url
   * @param json
   */
  public static String getDataByJson(String url,String json,String encode){
    HttpClient client = newClient();
    HttpPost post = new HttpPost(url);
    String response = null;
    try {
      StringEntity s = new StringEntity(json);
      s.setContentEncoding(encode);
      // 发送json数据需要设置contentType
      s.setContentType("application/json");
      post.setEntity(s);
      HttpResponse res = client.execute(post);
      if(res.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
        // 返回json格式：
        response = EntityUtils.toString(res.getEntity());
      }
    } catch (Exception e) { }
    return response;
  }
  
  /**
   * post
   * @param url
   * @param xml
   */
  public static String getDataByXml(String url,String xml,String encode){
    HttpClient client = newClient();
    HttpPost post = new HttpPost(url);
    String response = null;
    try {
      StringEntity s = new StringEntity(xml);
      s.setContentEncoding(encode);
      //发送xml数据需要设置contentType
      s.setContentType("application/xml");
      post.setEntity(s);
      HttpResponse res = client.execute(post);
      if(res.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
        // 返回xml格式：
        response = EntityUtils.toString(res.getEntity());
      }
    } catch (Exception e) { }
    return response;
  }
  
  /**
   * post with header
   * @param url
   * @param json
   */
  public static String getDataByJsonWithHeader(String url,String json,String encode,Map<String,String> header){
    HttpClient client = newClient();
    HttpPost post = new HttpPost(url);
    for(String key:header.keySet()){
      post.setHeader(key, header.get(key));
    }
    String response = null;
    try {
      StringEntity s = new StringEntity(json);
      s.setContentEncoding(encode);
      s.setContentType("application/json;charset=utf-8"); 
      post.setEntity(s);
      HttpResponse res = client.execute(post);
      if(res.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
        response = EntityUtils.toString(res.getEntity()); 
      }
    } catch (Exception e) { }
    return response;
  }
  
  //TODO 未完成方法 
  /**
   * 通过参数获取指定位置的数据
   * {%} 代表查询匹配
   * {*} 代表查询匹配忽略内容（任意长度）
   * {%数字} 代表输出匹配内容 
   * 概念来自 http://feed43.com/
   *          http://feed43.com/feed.html?action=new
   *          http://www.feed43.com/understanding-patterns.html
   *          http://www.feedity.com/
   * @param 
   * @return 
   */
  public static List<String> parttenData(String url,
      Map<String,String> params,HttpMethod method,String encode,
      String searchPattern,String outputTemplate){
    // 通过数据爬取接口获取数据
    String demo = FileHelper.toString("/demo.html");
    // 去掉所有{} 换行符
    String desc = demo.replaceAll("\\{|\\}|\\r|(\\r\\n){1}|\\\n", "");
    // 匹配 所有的内容 searchPattern
    String sp1 = searchPattern.replaceAll("(\\{%\\})|(\\{\\*\\})", "\\(\\.\\)\\*");
    String[] sp2 = sp1.split("\\(\\.\\)\\*");
    String sp3 = "";
    for(int i=0;i<sp2.length;i++){
      if(i==sp2.length-1){
        sp3 += "(" + sp2[i] + "){1}";
      }else{
        sp3 += "(" + sp2[i] + "){1}(.)*?";
      }
    }
    Pattern p = Pattern.compile(sp3);
    Matcher m = p.matcher(desc);

    List<String> srcStr = new ArrayList<String>();
    List<String> resStr = new ArrayList<String>();

    // 得到源字符串
    while(m.find()){
      String buf = m.group();

      String[] sp4 = searchPattern.split("\\{\\*\\}");
      String output = outputTemplate.replaceAll("\\{%(\\d)\\}","%s");
      List<String> param = new ArrayList<String>();
      for(int i=0;i<sp4.length;i++){
        String sp5 = sp4[i];
        String[] sp6 = sp5.split("\\{%\\}");
        String sp7 = "";
        for(int j=0;j<sp6.length;j++){
          if(j==sp6.length-1){
            sp7 += "(?=(" + sp6[j] + "))";
          }else{
            sp7 += "(?<=(" + sp6[j] + "))(.)*?";
          } // (?<=A).*?(?=B)
        }
        Pattern p1 = Pattern.compile(sp7);
        Matcher m1 = p1.matcher(buf);
        if(m1.find()){
          String buff = m1.group();
          param.add(buff);
        }
      }
      String formatStr = String.format(output,param.toArray());
      System.out.println(formatStr);
      resStr.add(formatStr);
      srcStr.add(buf);
    }

    return resStr;
  }

  
  
  
  
}
